str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
qplot(x = displ, y = hwy, data = mpg)
Modifying aesthetics
qplot(x = displ, y = hwy, data = mpg, color = drv)
Adding a geom
qplot(x = displ, y = hwy, data = mpg, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess'
Histograms
qplot(x = hwy, data = mpg, fill = drv)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Facets
qplot(x = displ, y = hwy, data = mpg, facets = . ~ drv)
qplot(x = hwy, data = mpg, facets = drv ~., binwidth = 2)
Reading maacs data
load("~/Downloads/maacs.Rda")
maacs %>% head()
## id eno duBedMusM pm25 mopos
## 1 1 141 2423 15.560 yes
## 2 2 124 2793 34.370 yes
## 3 3 126 3055 38.953 yes
## 4 4 164 775 33.249 yes
## 5 5 99 1634 27.060 yes
## 6 6 68 939 18.890 yes
maacs %>% str
## 'data.frame': 750 obs. of 5 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ eno : num 141 124 126 164 99 68 41 50 12 30 ...
## $ duBedMusM: num 2423 2793 3055 775 1634 ...
## $ pm25 : num 15.6 34.4 39 33.2 27.1 ...
## $ mopos : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
Histogram of eNO
qplot(x = log(eno), data = maacs )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 108 rows containing non-finite values (stat_bin).
Histogram by groups
qplot(x = log(eno), data = maacs, fill = mopos)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 108 rows containing non-finite values (stat_bin).
Density smooth
qplot(x = log(eno), data = maacs, geom = "density")
## Warning: Removed 108 rows containing non-finite values (stat_density).
qplot(x = log(eno), data = maacs, geom = "density", color = mopos)
## Warning: Removed 108 rows containing non-finite values (stat_density).
Scatterplots
ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno)))+
geom_point()
## Warning: Removed 184 rows containing missing values (geom_point).
ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno), shape = mopos)) +
geom_point()
## Warning: Removed 184 rows containing missing values (geom_point).
ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno), color = mopos)) +
geom_point()+
geom_smooth(method = "lm")
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
Using facets
gg <- ggplot(data = maacs, mapping = aes(x = log(pm25), y = log(eno))) +
geom_point()+
geom_smooth(method = "lm")
plot(gg)
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
qplot(x = log(pm25), y = log(eno), data = maacs, facets = .~mopos) +
geom_smooth(method = "lm")
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
# gg + facet_wrap(.~mopos)
Building up in layers
load("~/Downloads/maacs (1).Rda")
head(maacs[,6:8])
## logpm25 NocturnalSympt bmicat
## 1 1.192010 0 normal weight
## 2 1.536180 0 overweight
## 3 1.590541 2 overweight
## 4 1.521779 2 normal weight
## 5 1.432328 2 normal weight
## 6 1.276232 2 normal weight
Plotting
load("~/Downloads/maacs (1).Rda")
g <- ggplot(data = maacs, mapping = aes(x = logpm25, y = NocturnalSympt))
summary(g)
## data: id, eno, duBedMusM, pm25, mopos, logpm25, NocturnalSympt,
## bmicat, logno2_new [750x9]
## mapping: x = logpm25, y = NocturnalSympt
## faceting: <ggproto object: Class FacetNull, Facet>
## compute_layout: function
## draw_back: function
## draw_front: function
## draw_labels: function
## draw_panels: function
## finish_data: function
## init_scales: function
## map: function
## map_data: function
## params: list
## render_back: function
## render_front: function
## render_panels: function
## setup_data: function
## setup_params: function
## shrink: TRUE
## train: function
## train_positions: function
## train_scales: function
## vars: function
## super: <ggproto object: Class FacetNull, Facet>
plot(g)
p <- g + geom_point()
plot(p)
## Warning: Removed 134 rows containing missing values (geom_point).
Adding more layers : Smooth and facets
g + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess'
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).
g + geom_point() + geom_smooth(method = "lm")
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).
g +
geom_point()+
facet_grid(. ~ bmicat)+
geom_smooth(method = "lm")
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).
Modifying aesthetics
g + geom_point(color = "steelblue", size = 4, alpha = 0.5)
## Warning: Removed 134 rows containing missing values (geom_point).
g + geom_point(aes(color = bmicat), size = 4, alpha = 0.5)
## Warning: Removed 134 rows containing missing values (geom_point).
Modifying labels
g +geom_point(aes(color = bmicat)) +
labs(title = "MAACS Cohort")+
labs (x = expression("log "* PM[2.5]), y = "Nocturnal Symptoms")
## Warning: Removed 134 rows containing missing values (geom_point).
Customizing the smooth
g + geom_point(aes(color = bmicat), size =2, alpha = 0.5)+
geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE )
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).
Changing the theme
g + geom_point(aes(color = bmicat)) +
theme_bw(base_family = "Times")
## Warning: Removed 134 rows containing missing values (geom_point).
Notes about Axis Limits
With base plot
testdat <- data.frame(x = 1:100, y = rnorm(100))
testdat[50,2] <- 100 # Outlier
plot(testdat$x, testdat$y, type = "l", ylim = c(-3,3))
With ggplot
g <- ggplot(data = testdat, mapping = aes(x = x, y = y))
g + geom_line()
Exploring the maacs data again
load("~/Downloads/maacs (1).Rda")
maacs %>% head
## id eno duBedMusM pm25 mopos logpm25 NocturnalSympt bmicat
## 1 1 141 2423 15.560 yes 1.192010 0 normal weight
## 2 2 124 2793 34.370 yes 1.536180 0 overweight
## 3 3 126 3055 38.953 yes 1.590541 2 overweight
## 4 4 164 775 33.249 yes 1.521779 2 normal weight
## 5 5 99 1634 27.060 yes 1.432328 2 normal weight
## 6 6 68 939 18.890 yes 1.276232 2 normal weight
## logno2_new
## 1 1.617849
## 2 1.884490
## 3 1.712953
## 4 1.458879
## 5 1.294510
## 6 1.468377
(cutpoints <- quantile(maacs$logno2_new, seq(0,1,length = 4), na.rm = TRUE))
## 0% 33.33333% 66.66667% 100%
## -0.6289321 1.1828710 1.4418993 2.4775279
maacs$no2dec <- cut(maacs$logno2_new,cutpoints)
levels(maacs$no2dec)
## [1] "(-0.629,1.18]" "(1.18,1.44]" "(1.44,2.48]"
Code for final plot
g <- ggplot(data = maacs, mapping = aes(x = logpm25, y = NocturnalSympt))
## Add layers
g + geom_point(alpha = 1/3)+
facet_wrap(bmicat ~ no2dec, nrow = 2, ncol = 4)+
geom_smooth(method = "lm", se = FALSE, col = "steelblue")+
theme_bw(base_family = "Avenir", base_size = 10)+
labs(x = expression("log " * PM[2.5]))+
labs(y = "Nocturnal Symptoms")+
labs(title = "MAACS Cohort")
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).